1 and 2. Look at the chapter on interactive graphics and, specifically, the code to display a subject's MRICloud data as a sunburst plot. Do the following. Display this subject's data as a Sankey diagram. Display as many levels as you can for type = 1, starting from the intracranial volume. Put this in a file called hw4.ipynb.
import pandas as pd
import plotly.express as px
import numpy as np
import os
import plotly.graph_objects as go
## load in the hierarchy information
url = "https://raw.githubusercontent.com/bcaffo/MRIcloudT1volumetrics/master/inst/extdata/multilevel_lookup_table.txt"
multilevel_lookup = pd.read_csv(url, sep = "\t").drop(['Level5'], axis = 1)
multilevel_lookup = multilevel_lookup.rename(columns = {
"modify" : "roi",
"modify.1" : "level4",
"modify.2" : "level3",
"modify.3" : "level2",
"modify.4" : "level1"})
multilevel_lookup = multilevel_lookup[['roi', 'level4', 'level3', 'level2', 'level1']]
multilevel_lookup.head()
| roi | level4 | level3 | level2 | level1 | |
|---|---|---|---|---|---|
| 0 | SFG_L | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L |
| 1 | SFG_R | SFG_R | Frontal_R | CerebralCortex_R | Telencephalon_R |
| 2 | SFG_PFC_L | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L |
| 3 | SFG_PFC_R | SFG_R | Frontal_R | CerebralCortex_R | Telencephalon_R |
| 4 | SFG_pole_L | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L |
## Now load in the subject data
id = 127
subjectData = pd.read_csv("kirby21AllLevels.csv")
subjectData = subjectData.loc[(subjectData.type == 1) & (subjectData.level == 5) & (subjectData.id == id)]
# Show only this subject's region (ROI) and volume
subjectData = subjectData[['roi', 'volume']]
## Merge the subject data with the multilevel data
subjectData = pd.merge(subjectData, multilevel_lookup, on = "roi")
subjectData = subjectData.assign(icv = "ICV")
subjectData = subjectData.assign(comp = subjectData.volume / np.sum(subjectData.volume))
subjectData.head()
| roi | volume | level4 | level3 | level2 | level1 | icv | comp | |
|---|---|---|---|---|---|---|---|---|
| 0 | SFG_L | 12926 | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L | ICV | 0.009350 |
| 1 | SFG_R | 10050 | SFG_R | Frontal_R | CerebralCortex_R | Telencephalon_R | ICV | 0.007270 |
| 2 | SFG_PFC_L | 12783 | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L | ICV | 0.009247 |
| 3 | SFG_PFC_R | 11507 | SFG_R | Frontal_R | CerebralCortex_R | Telencephalon_R | ICV | 0.008324 |
| 4 | SFG_pole_L | 3078 | SFG_L | Frontal_L | CerebralCortex_L | Telencephalon_L | ICV | 0.002227 |
subjectData = subjectData.drop(['volume'], axis=1)
df2=subjectData.groupby(['icv', 'level1'], as_index=False)['comp'].count()
df2.head()
df3=subjectData.groupby(['level1', 'level2'], as_index=False)['comp'].count()
df3.head()
df4=subjectData.groupby(['level2','level3'], as_index=False)['comp'].count()
df4.head()
df5=subjectData.groupby(['level3','level4'], as_index=False)['comp'].count()
df5.head()
df6=subjectData.groupby(['level4','roi'], as_index=False)['comp'].count()
df6.head()
| level4 | roi | comp | |
|---|---|---|---|
| 0 | AG_L | AG_L | 1 |
| 1 | AG_R | AG_R | 1 |
| 2 | ALIC_L | ALIC_L | 1 |
| 3 | ALIC_R | ALIC_R | 1 |
| 4 | Amyg_L | Amyg_L | 1 |
df2.columns = ['a', 'b', 'comp']
df3.columns = ['a', 'b', 'comp']
df4.columns = ['a', 'b', 'comp']
df5.columns = ['a', 'b', 'comp']
df6.columns = ['a', 'b', 'comp']
df7 = df2.append(df3)
df8 = df7.append(df4)
df9 = df8.append(df5)
df10 = df9.append(df6)
df10
| a | b | comp | |
|---|---|---|---|
| 0 | ICV | CSF | 32 |
| 1 | ICV | Diencephalon_L | 8 |
| 2 | ICV | Diencephalon_R | 8 |
| 3 | ICV | Mesencephalon | 8 |
| 4 | ICV | Metencephalon | 20 |
| ... | ... | ... | ... |
| 269 | midbrain_R | Midbrain_R | 1 |
| 270 | midbrain_R | RedNc_R | 1 |
| 271 | midbrain_R | Snigra_R | 1 |
| 272 | post_DPWM_L | PCR_L | 1 |
| 273 | post_DPWM_R | PCR_R | 1 |
500 rows × 3 columns
import chart_studio.plotly as py
def genSankey(df,cat_cols=[],value_cols='',title='Sankey Diagram'):
# maximum of 6 value cols -> 6 colors
colorPalette = ['#FFD43B','#646464','#4B8BBE','#306998']
labelList = []
colorNumList = []
for catCol in cat_cols:
labelListTemp = list(set(df[catCol].values))
colorNumList.append(len(labelListTemp))
labelList = labelList + labelListTemp
# remove duplicates from labelList
labelList = list(dict.fromkeys(labelList))
# define colors based on number of levels
colorList = []
for idx, colorNum in enumerate(colorNumList):
colorList = colorList + [colorPalette[idx]]*colorNum
# transform df into a source-target pair
for i in range(len(cat_cols)-1):
if i==0:
sourceTargetDf = df[[cat_cols[i],cat_cols[i+1],value_cols]]
sourceTargetDf.columns = ['source','target','count']
else:
tempDf = df[[cat_cols[i],cat_cols[i+1],value_cols]]
tempDf.columns = ['source','target','count']
sourceTargetDf = pd.concat([sourceTargetDf,tempDf])
sourceTargetDf = sourceTargetDf.groupby(['source','target']).agg({'count':'sum'}).reset_index()
# add index for source-target pair
sourceTargetDf['sourceID'] = sourceTargetDf['source'].apply(lambda x: labelList.index(x))
sourceTargetDf['targetID'] = sourceTargetDf['target'].apply(lambda x: labelList.index(x))
# creating the sankey diagram
data = dict(
type='sankey',
node = dict(
pad = 15,
thickness = 20,
line = dict(
color = "black",
width = 0.5
),
label = labelList,
color = colorList
),
link = dict(
source = sourceTargetDf['sourceID'],
target = sourceTargetDf['targetID'],
value = sourceTargetDf['count']
)
)
layout = dict(
title = title,
font = dict(
size = 10
)
)
fig = dict(data=[data], layout=layout)
return fig
import plotly
fig = go.Figure(genSankey(df10,
cat_cols=['a','b'],
value_cols='comp',
title='Sankey Diagram of Brain Regions'),
)
fig.update_layout(
autosize=False,
width=1000,
height=2000,
paper_bgcolor="LightSteelBlue",
)
#plotly.offline.plot(fig, validate=False)
fig.show()
#fig.to_html()